# This file includes statistical analyses and code used
# to create all Tables that appear in the Main Manuscript (Tables 1, 2 and 3).
# Code is designed to run sequentially
# File should be run sequentially after coding files 1-3

############################################################################
# Table 1: Mean and Difference from Private Punishment Preference
############################################################################

# I conduct simple paired statistical significance tests for difference between variables

# general function to analyze data
ttgroupfct <- function(V1, V2, DTA){
  C1 <- mean(DTA[, V2], na.rm=TRUE)
  C2 <- sd(DTA[, V2], na.rm=TRUE)
  C3 <- mean(DTA[, V2], na.rm=TRUE)- mean(DTA[,V1], na.rm=TRUE)
  C4 <- t.test(DTA[, V2] , DTA[, V1], paired=TRUE)$p.value
  R <- cbind(C1, C2, C3, C4)
  return(R)
}


# create 3 data frames
R0.VIOL <- data.frame(round(data.frame(mean(D3$PRIV1.RC2.VIOL, na.rm=TRUE), sd(D3$PRIV1.RC2.VIOL, na.rm=TRUE)),3), NA, NA)
colnames(R0.VIOL) <- c("Mean", "SD", "Diff from Private 1", "P-value of Diff")

R0.DV <- data.frame(round(data.frame(mean(D3$PRIV1.RC2.DV, na.rm=TRUE), sd(D3$PRIV1.RC2.DV, na.rm=TRUE)),3), NA, NA)
colnames(R0.DV) <- c("Mean", "SD", "Diff from Private 1", "P-value of Diff")

R0.VOL <- data.frame(round(data.frame(mean(D3$PRIV1.RC2.VOL, na.rm=TRUE), sd(D3$PRIV1.RC2.VOL, na.rm=TRUE)),3), NA, NA)
colnames(R0.VOL) <- c("Mean", "SD", "Diff from Private 1", "P-value of Diff")

# run function on data frame for each set of comparisons (rape)
R1 <- ttgroupfct(DTA=D3, V1="PRIV1.RC2.VIOL", V2="PUB1.RC2.VIOL")
R2 <- ttgroupfct(DTA=D3, V1="PRIV1.RC2.VIOL", V2="GCHOIX.RC2.VIOL")
R3 <- ttgroupfct(DTA=D3, V1="PRIV1.RC2.VIOL", V2="PRIV2.RC2.VIOL")
T.VIOL <- round(data.frame(rbind(R1, R2, R3)), 3)
colnames(T.VIOL) <- c("Mean", "SD", "Diff from Private 1", "P-value of Diff")
T.VIOL <- rbind(R0.VIOL, T.VIOL)
rownames(T.VIOL) <- c("Private","Publicly Expressed", "Group Choice", "Post Discussion Private")

#repeat (wife-beating)
R1 <- ttgroupfct(DTA=D3, V1="PRIV1.RC2.DV", V2="PUB1.RC2.DV")
R2 <- ttgroupfct(DTA=D3, V1="PRIV1.RC2.DV", V2="GCHOIX.RC2.DV")
R3 <- ttgroupfct(DTA=D3, V1="PRIV1.RC2.DV", V2="PRIV2.RC2.DV")
T.DV <- round(data.frame(rbind(R1, R2, R3)), 3)
colnames(T.DV) <- c("Mean", "SD", "Diff from Private 1", "P-value of Diff")
T.DV <- rbind(R0.DV, T.DV)
rownames(T.DV) <- c("Private","Publicly Expressed", "Group Choice", "Post Discussion Private")

#repeat (theft)
R1 <- ttgroupfct(DTA=D3, V1="PRIV1.RC2.VOL", V2="PUB1.RC2.VOL")
R2 <- ttgroupfct(DTA=D3, V1="PRIV1.RC2.VOL", V2="GCHOIX.RC2.VOL")
R3 <- ttgroupfct(DTA=D3, V1="PRIV1.RC2.VOL", V2="PRIV2.RC2.VOL")
T.VOL <- round(data.frame(rbind(R1, R2, R3)), 3)
colnames(T.VOL) <- c("Mean", "SD", "Diff from Private 1", "P-value of Diff")
T.VOL <- rbind(R0.VOL, T.VOL)
rownames(T.VOL) <- c("Private","Publicly Expressed", "Group Choice", "Post Discussion Private")

#refine data frame
Diffmat <- data.frame(rbind(T.VIOL, T.DV, T.VOL), check.names = FALSE)
colnames(Diffmat) <- c("Mean", "SD", "Diff from Private 1", "P-value of Diff")
rownames(Diffmat) <- c("Private","Publicly Expressed", "Group Choice", "Post Discussion Private", "Private ","Publicly Expressed ", "Group Choice ", "Post Discussion Private ", "Private  ","Publicly Expressed  ", "Group Choice  ", "Post Discussion Private  ")

#output table
kable(Diffmat, "rst", caption = "Mean and Difference from Private Punishment Preference", booktabs = T) %>% 
  kable_styling()  %>%
  pack_rows("Rape", 1, 4) %>%
  pack_rows("Wife-beating", 5, 8) %>%
  pack_rows("Theft", 9, 12)

############################################################################
# Table 2: Difference across all punishment preferences
############################################################################

# function to calculate differences and statistical significance between all preference types
ttgroupfct2 <- function(V1, V2, V3, V4, DTA){
  R <-  vector(mode = "list")
  
  C1 <- round(mean(DTA[, V2], na.rm=TRUE)- mean(DTA[,V1], na.rm=TRUE),2)
  C1P <- t.test(DTA[, V2] , DTA[, V1], paired=TRUE)$p.value
  
  C2 <- round(mean(DTA[, V3], na.rm=TRUE)- mean(DTA[,V1], na.rm=TRUE),2)
  C2P <- t.test(DTA[, V3] , DTA[, V1], paired=TRUE)$p.value
  
  C3 <- round(mean(DTA[, V4], na.rm=TRUE)- mean(DTA[,V1], na.rm=TRUE),2)
  C3P <- t.test(DTA[, V4] , DTA[, V1], paired=TRUE)$p.value
  
  C4 <- round(mean(DTA[, V3], na.rm=TRUE)- mean(DTA[,V2], na.rm=TRUE),2)
  C4P <- t.test(DTA[, V3] , DTA[, V2], paired=TRUE)$p.value
  
  C5 <- round(mean(DTA[, V4], na.rm=TRUE)- mean(DTA[,V2], na.rm=TRUE),2)
  C5P <- t.test(DTA[, V4] , DTA[, V2], paired=TRUE)$p.value
  
  C6 <- round(mean(DTA[, V4], na.rm=TRUE)- mean(DTA[,V3], na.rm=TRUE),2)
  C6P <- t.test(DTA[, V4] , DTA[, V3], paired=TRUE)$p.value
  
  R$diff <- cbind(C1, C2, C3, C4, C5, C6)
  R$pval <- cbind(C1P, C2P, C3P, C4P, C5P, C6P)
  
  return(R)
}

# function to indicate statistical significance with stars
formatfct <- function(V1, V2, V3, V4, DTA=DTA){
  
  DIFF <- ttgroupfct2(DTA=D3, V1=V1, V2=V2, V3=V3, V4=V4)
  
  mystars <- ifelse(DIFF$pval < .001, "***", ifelse(DIFF$pval < .01, "** ", ifelse(DIFF$pval < .05, "* ", " ")))
  
  DIFFP <- matrix(paste(DIFF$diff, mystars, sep=""))
  
  MAT <- matrix(data = "", nrow = 3, ncol = 3)
  
  MAT[1,1] <- DIFFP[1]
  MAT[2,1] <- DIFFP[2]
  MAT[3,1] <- DIFFP[3]
  MAT[2,2] <- DIFFP[4]
  MAT[3,2] <- DIFFP[5]
  MAT[3,3] <- DIFFP[6]
  MAT <- data.frame(MAT)
  rownames(MAT) <- c("Diff: Public - X", "Diff: Group - X", "Diff: PostDisc Private - X")
  colnames(MAT) <- c("X = Private", "X = Public", "X = Group")
  
  return(MAT)
}

# apply functions to compare differences between each preference type for each crime
DIFMAT2.VIOL <- formatfct(DTA=D3, V1="PRIV1.RC2.VIOL", V2="PUB1.RC2.VIOL", V3="GCHOIX.RC2.VIOL", V4="PRIV2.RC2.VIOL")
DIFMAT2.DV <- formatfct(DTA=D3, V1="PRIV1.RC2.DV", V2="PUB1.RC2.DV", V3="GCHOIX.RC2.DV", V4="PRIV2.RC2.DV")
DIFMAT2.VOL <- formatfct(DTA=D3, V1="PRIV1.RC2.VOL", V2="PUB1.RC2.VOL", V3="GCHOIX.RC2.VOL", V4="PRIV2.RC2.VOL")

# refine data frame
Diffmat2 <- data.frame(rbind(DIFMAT2.VIOL, DIFMAT2.DV, DIFMAT2.VOL))
colnames(Diffmat2) <- c("X = Private", "X = Public", "X = Group")
rownames(Diffmat2) <- c("Diff: Public - X", "Diff: Group - X", "Diff: PostDisc Private - X",
                        "Diff: Public - X ", "Diff: Group - X ", "Diff: PostDisc Private - X ", 
                        "Diff: Public - X  ", "Diff: Group - X  ", "Diff: PostDisc Private - X  " )

#output table
kable(Diffmat2, "rst", caption = "Difference across all Punishment Preferences", booktabs = T) %>% 
  kable_styling() %>%
  add_footnote(c("The column variable mean is subtracted from the row variable mean","Significance thresholds indicated by p<=.001***; p<=.05**; p<=.01*"), notation="alphabet") %>% 
  pack_rows("Rape", 1, 3) %>%
  pack_rows("Wife-beating", 4, 6) %>%
  pack_rows("Theft", 7, 9)

############################################################################
# Table 3: Determinants of Post Discussion Private Preferences
############################################################################

# function for clustered standard errors applied in regressions
cl   <- function(dat,fm, cluster){
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  not <- attr(fm$model,"na.action")
  if(!is.null(not)){   
    cluster <- cluster[-not]
    dat <- dat[-not,]
  }
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL) }

#linear regressions of private preferences, group choice and their interaction on post-discussion private preferences
# fixed effects by focus group
# clustered standard errors at the village level

#rape
I2.VIOL <- lm(PRIV2.RC2.VIOL ~ PRIV1.RC2.VIOL* GCHOIX.RC2.VIOL + FEMALE + EDU + RENCONAN + HOMOGENE + VIVEVILLN + LEVIOL.FG + GUERREAPRES2011 + as.factor(VFGID3), data=D3)
I2C.VIOL <- cl(dat=D3, fm=I2.VIOL, cluster=D3$VILLID)

#wife-beating
I2.DV <- lm(PRIV2.RC2.DV ~ PRIV1.RC2.DV*GCHOIX.RC2.DV + FEMALE + EDU + RENCONAN + HOMOGENE + VIVEVILLN + LAVIOLENCEDOMESTIQUE.FG + GUERREAPRES2011 + as.factor(VFGID3), data=D3)
I2C.DV <- cl(dat=D3, fm=I2.DV, cluster=D3$VILLID)

#theft
I2.VOL <- lm(PRIV2.RC2.VOL ~ PRIV1.RC2.VOL*GCHOIX.RC2.VOL + FEMALE + EDU + RENCONAN + HOMOGENE + VIVEVILLN + LEVOL.FG + GUERREAPRES2011 + as.factor(VFGID3), data=D3)
I2C.VOL <- cl(dat=D3, fm=I2.VOL, cluster=D3$VILLID)

#stargazer function
starfcti <- function(A, B, C, AC, BC, CC, TI){stargazer(A, B, C, type = "text", 
                                                        title = TI, 
                                                        se = list(AC[,2], BC[,2], CC[,2]), 
                                                        t = list(AC[,3], BC[,3], CC[,3]), 
                                                        p = list(AC[,4], BC[,4], CC[,4]), 
                                                        omit.stat = c("ser", "f"),
                                                        dep.var.labels = c("Rape","DV", "Theft"),
                                                        dep.var.caption  = "Dependent Variable: Punishment Preferences",
                                                        font.size = "small",
                                                        keep=c("PRIV1.RC2.VIOL", "GCHOIX.RC2.VIOL", "PRIV1.RC2.DV", "GCHOIX.RC2.DV", "PRIV1.RC2.VOL", "GCHOIX.RC2.VOL", "PRIV1.RC2.VIOL:GCHOIX.RC2.VIOL", "PRIV1.RC2.DV:GCHOIX.RC2.DV", "PRIV1.RC2.VOL:GCHOIX.RC2.VOL"),
                                                        omit = c("VFGID3", "EDU"),
                                                        omit.labels=c("Focus Group Fixed Effects? (N=79)", "Controls?"),
                                                        no.space=TRUE,
                                                        #column.labels   = c("Basic", "Interacted","Basic", "Interacted","Basic", "Interacted"),
                                                        #column.separate = c(3, 3),
                                                        notes = "Standard Errors clustered at the Village level (20)",
                                                        #intercept.bottom = FALSE,
                                                        covariate.labels = c("Rape: Private", "Rape: Group", "DV: Private", "DV: Group", "Steal: Private", "Steal: Group", "Rape: Private x Group", "DV: Private x Group", "Steal: Private x Group"),
                                                        header=FALSE)
}

#apply for output
starfcti(A=I2.VIOL, B=I2.DV, C=I2.VOL, 
         AC=I2C.VIOL, BC=I2C.DV, CC=I2C.VOL, 
         TI="Determinants of Post Discussion Private Preferences")
